Tidyverse

Elizabeth Valles

23 Nov 2019

Tidyverse

Tidyverse es una colección de paquetes de R diseñados para la ciencia de datos. Fue introducida por Hadley Wickham. Todos los paquetes comparten una filosofía de diseño, gramática y estructuras de datos subyacentes. * Libro R for Data Science https://r4ds.had.co.nz/

Datos en formato tidy

Datos Tidy

Workflow

Transformación de datos con dplyr

Datos sucios

library(tidyverse)
str(billboard)          # the base R way to see the structure of a dataframe
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 317 obs. of  79 variables:
##  $ artist      : chr  "2 Pac" "2Ge+her" "3 Doors Down" "3 Doors Down" ...
##  $ track       : chr  "Baby Don't Cry (Keep..." "The Hardest Part Of ..." "Kryptonite" "Loser" ...
##  $ date.entered: Date, format: "2000-02-26" "2000-09-02" ...
##  $ wk1         : num  87 91 81 76 57 51 97 84 59 76 ...
##  $ wk2         : num  82 87 70 76 34 39 97 62 53 76 ...
##  $ wk3         : num  72 92 68 72 25 34 96 51 38 74 ...
##  $ wk4         : num  77 NA 67 69 17 26 95 41 28 69 ...
##  $ wk5         : num  87 NA 66 67 17 26 100 38 21 68 ...
##  $ wk6         : num  94 NA 57 65 31 19 NA 35 18 67 ...
##  $ wk7         : num  99 NA 54 55 36 2 NA 35 16 61 ...
##  $ wk8         : num  NA NA 53 59 49 2 NA 38 14 58 ...
##  $ wk9         : num  NA NA 51 62 53 3 NA 38 12 57 ...
##  $ wk10        : num  NA NA 51 61 57 6 NA 36 10 59 ...
##  $ wk11        : num  NA NA 51 61 64 7 NA 37 9 66 ...
##  $ wk12        : num  NA NA 51 59 70 22 NA 37 8 68 ...
##  $ wk13        : num  NA NA 47 61 75 29 NA 38 6 61 ...
##  $ wk14        : num  NA NA 44 66 76 36 NA 49 1 67 ...
##  $ wk15        : num  NA NA 38 72 78 47 NA 61 2 59 ...
##  $ wk16        : num  NA NA 28 76 85 67 NA 63 2 63 ...
##  $ wk17        : num  NA NA 22 75 92 66 NA 62 2 67 ...
##  $ wk18        : num  NA NA 18 67 96 84 NA 67 2 71 ...
##  $ wk19        : num  NA NA 18 73 NA 93 NA 83 3 79 ...
##  $ wk20        : num  NA NA 14 70 NA 94 NA 86 4 89 ...
##  $ wk21        : num  NA NA 12 NA NA NA NA NA 5 NA ...
##  $ wk22        : num  NA NA 7 NA NA NA NA NA 5 NA ...
##  $ wk23        : num  NA NA 6 NA NA NA NA NA 6 NA ...
##  $ wk24        : num  NA NA 6 NA NA NA NA NA 9 NA ...
##  $ wk25        : num  NA NA 6 NA NA NA NA NA 13 NA ...
##  $ wk26        : num  NA NA 5 NA NA NA NA NA 14 NA ...
##  $ wk27        : num  NA NA 5 NA NA NA NA NA 16 NA ...
##  $ wk28        : num  NA NA 4 NA NA NA NA NA 23 NA ...
##  $ wk29        : num  NA NA 4 NA NA NA NA NA 22 NA ...
##  $ wk30        : num  NA NA 4 NA NA NA NA NA 33 NA ...
##  $ wk31        : num  NA NA 4 NA NA NA NA NA 36 NA ...
##  $ wk32        : num  NA NA 3 NA NA NA NA NA 43 NA ...
##  $ wk33        : num  NA NA 3 NA NA NA NA NA NA NA ...
##  $ wk34        : num  NA NA 3 NA NA NA NA NA NA NA ...
##  $ wk35        : num  NA NA 4 NA NA NA NA NA NA NA ...
##  $ wk36        : num  NA NA 5 NA NA NA NA NA NA NA ...
##  $ wk37        : num  NA NA 5 NA NA NA NA NA NA NA ...
##  $ wk38        : num  NA NA 9 NA NA NA NA NA NA NA ...
##  $ wk39        : num  NA NA 9 NA NA NA NA NA NA NA ...
##  $ wk40        : num  NA NA 15 NA NA NA NA NA NA NA ...
##  $ wk41        : num  NA NA 14 NA NA NA NA NA NA NA ...
##  $ wk42        : num  NA NA 13 NA NA NA NA NA NA NA ...
##  $ wk43        : num  NA NA 14 NA NA NA NA NA NA NA ...
##  $ wk44        : num  NA NA 16 NA NA NA NA NA NA NA ...
##  $ wk45        : num  NA NA 17 NA NA NA NA NA NA NA ...
##  $ wk46        : num  NA NA 21 NA NA NA NA NA NA NA ...
##  $ wk47        : num  NA NA 22 NA NA NA NA NA NA NA ...
##  $ wk48        : num  NA NA 24 NA NA NA NA NA NA NA ...
##  $ wk49        : num  NA NA 28 NA NA NA NA NA NA NA ...
##  $ wk50        : num  NA NA 33 NA NA NA NA NA NA NA ...
##  $ wk51        : num  NA NA 42 NA NA NA NA NA NA NA ...
##  $ wk52        : num  NA NA 42 NA NA NA NA NA NA NA ...
##  $ wk53        : num  NA NA 49 NA NA NA NA NA NA NA ...
##  $ wk54        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ wk55        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ wk56        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ wk57        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ wk58        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ wk59        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ wk60        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ wk61        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ wk62        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ wk63        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ wk64        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ wk65        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ wk66        : logi  NA NA NA NA NA NA ...
##  $ wk67        : logi  NA NA NA NA NA NA ...
##  $ wk68        : logi  NA NA NA NA NA NA ...
##  $ wk69        : logi  NA NA NA NA NA NA ...
##  $ wk70        : logi  NA NA NA NA NA NA ...
##  $ wk71        : logi  NA NA NA NA NA NA ...
##  $ wk72        : logi  NA NA NA NA NA NA ...
##  $ wk73        : logi  NA NA NA NA NA NA ...
##  $ wk74        : logi  NA NA NA NA NA NA ...
##  $ wk75        : logi  NA NA NA NA NA NA ...
##  $ wk76        : logi  NA NA NA NA NA NA ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   year = col_skip(),
##   ..   artist = col_character(),
##   ..   track = col_character(),
##   ..   time = col_skip(),
##   ..   date.entered = col_date(format = ""),
##   ..   wk1 = col_double(),
##   ..   wk2 = col_double(),
##   ..   wk3 = col_double(),
##   ..   wk4 = col_double(),
##   ..   wk5 = col_double(),
##   ..   wk6 = col_double(),
##   ..   wk7 = col_double(),
##   ..   wk8 = col_double(),
##   ..   wk9 = col_double(),
##   ..   wk10 = col_double(),
##   ..   wk11 = col_double(),
##   ..   wk12 = col_double(),
##   ..   wk13 = col_double(),
##   ..   wk14 = col_double(),
##   ..   wk15 = col_double(),
##   ..   wk16 = col_double(),
##   ..   wk17 = col_double(),
##   ..   wk18 = col_double(),
##   ..   wk19 = col_double(),
##   ..   wk20 = col_double(),
##   ..   wk21 = col_double(),
##   ..   wk22 = col_double(),
##   ..   wk23 = col_double(),
##   ..   wk24 = col_double(),
##   ..   wk25 = col_double(),
##   ..   wk26 = col_double(),
##   ..   wk27 = col_double(),
##   ..   wk28 = col_double(),
##   ..   wk29 = col_double(),
##   ..   wk30 = col_double(),
##   ..   wk31 = col_double(),
##   ..   wk32 = col_double(),
##   ..   wk33 = col_double(),
##   ..   wk34 = col_double(),
##   ..   wk35 = col_double(),
##   ..   wk36 = col_double(),
##   ..   wk37 = col_double(),
##   ..   wk38 = col_double(),
##   ..   wk39 = col_double(),
##   ..   wk40 = col_double(),
##   ..   wk41 = col_double(),
##   ..   wk42 = col_double(),
##   ..   wk43 = col_double(),
##   ..   wk44 = col_double(),
##   ..   wk45 = col_double(),
##   ..   wk46 = col_double(),
##   ..   wk47 = col_double(),
##   ..   wk48 = col_double(),
##   ..   wk49 = col_double(),
##   ..   wk50 = col_double(),
##   ..   wk51 = col_double(),
##   ..   wk52 = col_double(),
##   ..   wk53 = col_double(),
##   ..   wk54 = col_double(),
##   ..   wk55 = col_double(),
##   ..   wk56 = col_double(),
##   ..   wk57 = col_double(),
##   ..   wk58 = col_double(),
##   ..   wk59 = col_double(),
##   ..   wk60 = col_double(),
##   ..   wk61 = col_double(),
##   ..   wk62 = col_double(),
##   ..   wk63 = col_double(),
##   ..   wk64 = col_double(),
##   ..   wk65 = col_double(),
##   ..   wk66 = col_logical(),
##   ..   wk67 = col_logical(),
##   ..   wk68 = col_logical(),
##   ..   wk69 = col_logical(),
##   ..   wk70 = col_logical(),
##   ..   wk71 = col_logical(),
##   ..   wk72 = col_logical(),
##   ..   wk73 = col_logical(),
##   ..   wk74 = col_logical(),
##   ..   wk75 = col_logical(),
##   ..   wk76 = col_logical()
##   .. )

head(billboard)
## # A tibble: 6 x 79
##   artist track date.entered   wk1   wk2   wk3   wk4   wk5   wk6   wk7   wk8
##   <chr>  <chr> <date>       <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2 Pac  Baby… 2000-02-26      87    82    72    77    87    94    99    NA
## 2 2Ge+h… The … 2000-09-02      91    87    92    NA    NA    NA    NA    NA
## 3 3 Doo… Kryp… 2000-04-08      81    70    68    67    66    57    54    53
## 4 3 Doo… Loser 2000-10-21      76    76    72    69    67    65    55    59
## 5 504 B… Wobb… 2000-04-15      57    34    25    17    17    31    36    49
## 6 98^0   Give… 2000-08-19      51    39    34    26    26    19     2     2
## # … with 68 more variables: wk9 <dbl>, wk10 <dbl>, wk11 <dbl>, wk12 <dbl>,
## #   wk13 <dbl>, wk14 <dbl>, wk15 <dbl>, wk16 <dbl>, wk17 <dbl>,
## #   wk18 <dbl>, wk19 <dbl>, wk20 <dbl>, wk21 <dbl>, wk22 <dbl>,
## #   wk23 <dbl>, wk24 <dbl>, wk25 <dbl>, wk26 <dbl>, wk27 <dbl>,
## #   wk28 <dbl>, wk29 <dbl>, wk30 <dbl>, wk31 <dbl>, wk32 <dbl>,
## #   wk33 <dbl>, wk34 <dbl>, wk35 <dbl>, wk36 <dbl>, wk37 <dbl>,
## #   wk38 <dbl>, wk39 <dbl>, wk40 <dbl>, wk41 <dbl>, wk42 <dbl>,
## #   wk43 <dbl>, wk44 <dbl>, wk45 <dbl>, wk46 <dbl>, wk47 <dbl>,
## #   wk48 <dbl>, wk49 <dbl>, wk50 <dbl>, wk51 <dbl>, wk52 <dbl>,
## #   wk53 <dbl>, wk54 <dbl>, wk55 <dbl>, wk56 <dbl>, wk57 <dbl>,
## #   wk58 <dbl>, wk59 <dbl>, wk60 <dbl>, wk61 <dbl>, wk62 <dbl>,
## #   wk63 <dbl>, wk64 <dbl>, wk65 <dbl>, wk66 <lgl>, wk67 <lgl>,
## #   wk68 <lgl>, wk69 <lgl>, wk70 <lgl>, wk71 <lgl>, wk72 <lgl>,
## #   wk73 <lgl>, wk74 <lgl>, wk75 <lgl>, wk76 <lgl>

Arreglo de datos - tidy: unir columnas

billboard %>% 
   gather(wk1:wk76, key = "week", value = "rank")   # combine 76 columns into 2
## # A tibble: 24,092 x 5
##    artist         track                   date.entered week   rank
##    <chr>          <chr>                   <date>       <chr> <dbl>
##  1 2 Pac          Baby Don't Cry (Keep... 2000-02-26   wk1      87
##  2 2Ge+her        The Hardest Part Of ... 2000-09-02   wk1      91
##  3 3 Doors Down   Kryptonite              2000-04-08   wk1      81
##  4 3 Doors Down   Loser                   2000-10-21   wk1      76
##  5 504 Boyz       Wobble Wobble           2000-04-15   wk1      57
##  6 98^0           Give Me Just One Nig... 2000-08-19   wk1      51
##  7 A*Teens        Dancing Queen           2000-07-08   wk1      97
##  8 Aaliyah        I Don't Wanna           2000-01-29   wk1      84
##  9 Aaliyah        Try Again               2000-03-18   wk1      59
## 10 Adams, Yolanda Open My Heart           2000-08-26   wk1      76
## # … with 24,082 more rows

Arreglo de datos - tidy: filtrar valores

billboard %>% 
  gather(wk1:wk76, key = "week", value = "rank") %>%                # combine 76 columns into 2
  filter(!is.na(rank))                                              # remove rows with NA
## # A tibble: 5,307 x 5
##    artist         track                   date.entered week   rank
##    <chr>          <chr>                   <date>       <chr> <dbl>
##  1 2 Pac          Baby Don't Cry (Keep... 2000-02-26   wk1      87
##  2 2Ge+her        The Hardest Part Of ... 2000-09-02   wk1      91
##  3 3 Doors Down   Kryptonite              2000-04-08   wk1      81
##  4 3 Doors Down   Loser                   2000-10-21   wk1      76
##  5 504 Boyz       Wobble Wobble           2000-04-15   wk1      57
##  6 98^0           Give Me Just One Nig... 2000-08-19   wk1      51
##  7 A*Teens        Dancing Queen           2000-07-08   wk1      97
##  8 Aaliyah        I Don't Wanna           2000-01-29   wk1      84
##  9 Aaliyah        Try Again               2000-03-18   wk1      59
## 10 Adams, Yolanda Open My Heart           2000-08-26   wk1      76
## # … with 5,297 more rows

Arreglo de datos - tidy: cambiar nombre de columnas

billboard %>% 
  gather(wk1:wk76, key = "week", value = "rank")  %>%               # combine 76 columns into 2
  filter(!is.na(rank)) %>%                                          # remove rows with NA
  mutate(weeks = as.numeric(str_extract(week, "[:digit:]+")))       # change "wk1" to 1, etc.
## # A tibble: 5,307 x 6
##    artist         track                   date.entered week   rank weeks
##    <chr>          <chr>                   <date>       <chr> <dbl> <dbl>
##  1 2 Pac          Baby Don't Cry (Keep... 2000-02-26   wk1      87     1
##  2 2Ge+her        The Hardest Part Of ... 2000-09-02   wk1      91     1
##  3 3 Doors Down   Kryptonite              2000-04-08   wk1      81     1
##  4 3 Doors Down   Loser                   2000-10-21   wk1      76     1
##  5 504 Boyz       Wobble Wobble           2000-04-15   wk1      57     1
##  6 98^0           Give Me Just One Nig... 2000-08-19   wk1      51     1
##  7 A*Teens        Dancing Queen           2000-07-08   wk1      97     1
##  8 Aaliyah        I Don't Wanna           2000-01-29   wk1      84     1
##  9 Aaliyah        Try Again               2000-03-18   wk1      59     1
## 10 Adams, Yolanda Open My Heart           2000-08-26   wk1      76     1
## # … with 5,297 more rows

Arreglo de datos - tidy: crear nuevas variables apartir de otras

billboard %>% 
  gather(wk1:wk76, key = "week", value = "rank") %>%                # combine 76 columns into 2
  filter(!is.na(rank)) %>%                                          # remove rows with NA
  mutate(weeks = as.numeric(str_extract(week, "[:digit:]+"))) %>%   # change "wk1" to 1, etc.
  mutate(date.out = date.entered + (weeks - 1) * 7)                 # create a new variable
## # A tibble: 5,307 x 7
##    artist       track             date.entered week   rank weeks date.out  
##    <chr>        <chr>             <date>       <chr> <dbl> <dbl> <date>    
##  1 2 Pac        Baby Don't Cry (… 2000-02-26   wk1      87     1 2000-02-26
##  2 2Ge+her      The Hardest Part… 2000-09-02   wk1      91     1 2000-09-02
##  3 3 Doors Down Kryptonite        2000-04-08   wk1      81     1 2000-04-08
##  4 3 Doors Down Loser             2000-10-21   wk1      76     1 2000-10-21
##  5 504 Boyz     Wobble Wobble     2000-04-15   wk1      57     1 2000-04-15
##  6 98^0         Give Me Just One… 2000-08-19   wk1      51     1 2000-08-19
##  7 A*Teens      Dancing Queen     2000-07-08   wk1      97     1 2000-07-08
##  8 Aaliyah      I Don't Wanna     2000-01-29   wk1      84     1 2000-01-29
##  9 Aaliyah      Try Again         2000-03-18   wk1      59     1 2000-03-18
## 10 Adams, Yola… Open My Heart     2000-08-26   wk1      76     1 2000-08-26
## # … with 5,297 more rows

Arreglo de datos - tidy: reordenar o seleccionar columnas

billboard %>% 
  gather(wk1:wk76, key = "week", value = "rank") %>%                # combine 76 columns into 2
  filter(!is.na(rank)) %>%                                          # remove rows with NA
  mutate(weeks = as.numeric(str_extract(week, "[:digit:]+"))) %>%   # change "wk1" to 1, etc.
  mutate(date.out = date.entered + (weeks - 1) * 7) %>%             # create a new variable
  select(rank, date.entered, weeks, date.out, track, artist)        # reorder the columnsax
## # A tibble: 5,307 x 6
##     rank date.entered weeks date.out   track                  artist       
##    <dbl> <date>       <dbl> <date>     <chr>                  <chr>        
##  1    87 2000-02-26       1 2000-02-26 Baby Don't Cry (Keep.… 2 Pac        
##  2    91 2000-09-02       1 2000-09-02 The Hardest Part Of .… 2Ge+her      
##  3    81 2000-04-08       1 2000-04-08 Kryptonite             3 Doors Down 
##  4    76 2000-10-21       1 2000-10-21 Loser                  3 Doors Down 
##  5    57 2000-04-15       1 2000-04-15 Wobble Wobble          504 Boyz     
##  6    51 2000-08-19       1 2000-08-19 Give Me Just One Nig.… 98^0         
##  7    97 2000-07-08       1 2000-07-08 Dancing Queen          A*Teens      
##  8    84 2000-01-29       1 2000-01-29 I Don't Wanna          Aaliyah      
##  9    59 2000-03-18       1 2000-03-18 Try Again              Aaliyah      
## 10    76 2000-08-26       1 2000-08-26 Open My Heart          Adams, Yolan…
## # … with 5,297 more rows

Arreglo de datos - tidy: reordenar valores

billboard %>% 
  gather(wk1:wk76, key = "week", value = "rank") %>%                # combine 76 columns into 2
  filter(!is.na(rank)) %>%                                          # remove rows with NA
  mutate(weeks = as.numeric(str_extract(week, "[:digit:]+"))) %>%   # change "wk1" to 1, etc.
  mutate(date.out = date.entered + (weeks - 1) * 7) %>%             # create a new variable
  select(rank, date.entered, weeks, date.out, track, artist) %>%    # reorder the columnsax
  arrange(rank, date.out)                                           # sort by rank and date
## # A tibble: 5,307 x 6
##     rank date.entered weeks date.out   track               artist          
##    <dbl> <date>       <dbl> <date>     <chr>               <chr>           
##  1     1 1999-11-27       8 2000-01-15 What A Girl Wants   Aguilera, Chris…
##  2     1 1999-11-27       9 2000-01-22 What A Girl Wants   Aguilera, Chris…
##  3     1 1999-10-23      15 2000-01-29 I Knew I Loved You  Savage Garden   
##  4     1 1999-10-23      16 2000-02-05 I Knew I Loved You  Savage Garden   
##  5     1 1999-10-23      17 2000-02-12 I Knew I Loved You  Savage Garden   
##  6     1 1999-12-11      11 2000-02-19 Thank God I Found … Carey, Mariah   
##  7     1 1999-10-23      19 2000-02-26 I Knew I Loved You  Savage Garden   
##  8     1 1999-06-05      40 2000-03-04 Amazed              Lonestar        
##  9     1 1999-06-05      41 2000-03-11 Amazed              Lonestar        
## 10     1 1999-12-25      13 2000-03-18 Say My Name         Destiny's Child 
## # … with 5,297 more rows

Arreglo de datos - tidy: reordenar valores

billboard %>% 
  gather(wk1:wk76, key = "week", value = "rank") %>%                # combine 76 columns into 2
  filter(!is.na(rank)) %>%                                          # remove rows with NA
  mutate(weeks = as.numeric(str_extract(week, "[:digit:]+"))) %>%   # change "wk1" to 1, etc.
  mutate(date.out = date.entered + (weeks - 1) * 7) %>%             # create a new variable
  select(rank, date.entered, weeks, date.out, track, artist) %>%    # reorder the columnsax
  arrange(rank, date.out)                                           # sort by rank and date
## # A tibble: 5,307 x 6
##     rank date.entered weeks date.out   track               artist          
##    <dbl> <date>       <dbl> <date>     <chr>               <chr>           
##  1     1 1999-11-27       8 2000-01-15 What A Girl Wants   Aguilera, Chris…
##  2     1 1999-11-27       9 2000-01-22 What A Girl Wants   Aguilera, Chris…
##  3     1 1999-10-23      15 2000-01-29 I Knew I Loved You  Savage Garden   
##  4     1 1999-10-23      16 2000-02-05 I Knew I Loved You  Savage Garden   
##  5     1 1999-10-23      17 2000-02-12 I Knew I Loved You  Savage Garden   
##  6     1 1999-12-11      11 2000-02-19 Thank God I Found … Carey, Mariah   
##  7     1 1999-10-23      19 2000-02-26 I Knew I Loved You  Savage Garden   
##  8     1 1999-06-05      40 2000-03-04 Amazed              Lonestar        
##  9     1 1999-06-05      41 2000-03-11 Amazed              Lonestar        
## 10     1 1999-12-25      13 2000-03-18 Say My Name         Destiny's Child 
## # … with 5,297 more rows

Visualización

Tipos de visualización

ggoplot2

The Grammar of Graphics

Las reglas gramaticales de los gráficos en ocasiones son matemáticas y otras estéticas.

Las matemática proporcionan herramientas simbólicas para representar abstracciones.

La estética, en el sentido griego original, ofrece principios para relacionar los atributos sensoriales (color, forma, sonido, etc.) con las abstracciones.

Wilkinson, 2005

Elementos gramaticales esenciales

Instalación

#install.packages("tidyverse")
#install.packages("ggplot2") 
#devtools::install_github("tidyverse/ggplot2")
library(tidyverse)
library(ggplot2)
library(plotly)
library(yaml)

Antes de gráficar…

Datos tidy

Datos a gráficar

capitulos_rladies <- readr::read_csv("https://raw.githubusercontent.com/cienciadedatos/datos-de-miercoles/master/datos/2019/2019-06-26/capitulos_rladies.csv")
glimpse(capitulos_rladies)
## Observations: 160
## Variables: 7
## $ capitulo <chr> "R-Ladies Barcelona", "R-Ladies Ushuaia", "R-Ladies Bil…
## $ creacion <dttm> 2016-10-22 10:56:36, 2018-05-09 16:39:48, 2019-02-27 0…
## $ miembros <dbl> 389, 23, 38, 114, 1213, 46, 261, 30, 108, 292, 149, 658…
## $ latitud  <dbl> 41.40, -54.79, 43.25, -41.14, -37.81, 45.25, -30.04, 40…
## $ longitud <dbl> 2.17, -68.31, -2.93, -71.32, 144.96, 19.85, -51.22, -74…
## $ ciudad   <chr> "Barcelona", "Ushuaia", "Bilbao", "San Carlos de Barilo…
## $ pais     <chr> "ES", "AR", "ES", "AR", "AU", "RS", "BR", "US", "AU", "…

¿Qué tipo de variables tenemos?

discretas, continuas…

¿Qué queremos visualizar?

una, dos variables, más…

Ahora si, vamos a gráficar…

Estructura básica

ggplot(data,mapping=aes())

cap <- ggplot(data = capitulos_rladies, mapping = aes(pais, miembros)) 
cap

Agregar gráfico

geom_“gráfico”

cap +
  geom_bar(stat = "identity")

Modificar tamaño de letra

cap + geom_bar(stat = "identity") +
  theme(axis.text.x = element_text(size = 5), text = element_text(12)) 

Color y remover guías

cap +
  geom_bar(stat = "identity",aes(fill = capitulo)) +
  theme(axis.text.x = element_text(size = 5), text = element_text(12)) +
  guides(fill = FALSE)

Modificar color

cap +
  geom_bar(stat = "identity",aes(fill = capitulo)) +
  theme(axis.text.x = element_text(size = 5), text = element_text(12)) +
  guides(fill = FALSE) + scale_fill_hue(l=40, c=60)

Modificar color 2

cap +
  geom_bar(stat = "identity",aes(fill = capitulo)) +
  theme(axis.text.x = element_text(size = 5), text = element_text(12)) +
  guides(fill = FALSE) + scale_fill_grey(start = 0.1, end = 0.9)

Agregar texto

cap + geom_bar(stat = "identity", aes(fill = capitulo)) +
  theme(axis.text.x = element_text(size = 5), text = element_text(12)) +
  guides(fill = FALSE) + scale_fill_hue(l=40, c=60) +
  ggtitle("Miembros de R-Ladies por país ") + xlab ("país")

Orientación

cap + geom_bar(stat = "identity", aes(fill = capitulo)) +
  theme(axis.text.x = element_text(size = 5), text = element_text(12)) +
  guides(fill = FALSE) + scale_fill_hue(l=40, c=60) +
  ggtitle("Miembros de R-Ladies por país ") + xlab ("país") +
  coord_flip()

Gráfico dinámico

bar <- cap + geom_bar(stat = "identity", aes(fill = capitulo)) +
  theme(axis.text.x = element_text(size = 5), text = element_text(12)) +
  guides(fill = FALSE) + scale_fill_hue(l=40, c=60) +
  ggtitle("Miembros de R-Ladies por país ") + xlab ("país")
ggplotly(bar)

Otros gráficos..

pai <- ggplot(capitulos_rladies, aes(creacion, miembros)) + geom_point(aes(color = capitulo)) +
  scale_fill_hue(l=20, c=100) + facet_wrap(~pais) +
  ggtitle("Miembros de R-Ladies por país y capítulo") + xlab ("país") + 
  ylab ("miembros") + theme(axis.text.x = element_text(5), text = element_text(size = 8)) +
  guides (color = FALSE)

Manipulación de datos

yea <- capitulos_rladies %>%
  separate(creacion, sep=c("-"), into = c("year", "month", "day")) %>% 
  ggplot(aes(month, miembros)) + 
  geom_point(aes(color = pais)) +
  facet_wrap(~year) + 
  scale_fill_hue(l=20, c=100) +
  ggtitle("Miembros de R-Ladies por fecha de creación") + 
  xlab ("país") + 
  ylab ("miembros") + 
  theme(axis.text.x = element_text(5), text = element_text(size = 12))
yea.plotly <- ggplotly(yea)

Guardar gráficos estáticos

bar <- cap + geom_bar(stat = "identity", aes(fill = capitulo)) +
  theme(axis.text.x = element_text(size = 5), text = element_text(12)) +
  guides(fill = FALSE) + scale_fill_hue(l=40, c=60) +
  ggtitle("Miembros de R-Ladies por país ") + xlab ("país")
ggsave("bar.png", width = 12, height = 10)

Guardar gráficos dinámicos

htmlwidgets::saveWidget(yea.plotly, "yea.ploy.html")

Eventos R-Ladies

eventos_rladies <- readr::read_csv("https://raw.githubusercontent.com/cienciadedatos/datos-de-miercoles/master/datos/2019/2019-06-26/eventos_rladies.csv")
glimpse(eventos_rladies)
## Observations: 1,534
## Variables: 6
## $ capitulo           <chr> "R-Ladies Barcelona", "R-Ladies Barcelona", "…
## $ titulo_evento      <chr> "¡Primer evento de R-Ladies Barcelona!", "Sca…
## $ fecha_local        <date> 2016-11-21, 2016-12-05, 2017-01-16, 2017-02-…
## $ hora_local         <time> 19:00:00, 19:00:00, 19:00:00, 19:00:00, 19:0…
## $ respuesta_asistire <dbl> 18, 58, 33, 38, 16, 31, 18, 1, 33, 36, 28, 30…
## $ descripcion_evento <chr> "<p>Estamos preparando el primer encuentro de…

Unir los archivos

rladies <- full_join(capitulos_rladies, eventos_rladies, by = "capitulo")
latam <- c("MX", "BZ", "GT", "HN", "SV", "NI", "CR", "PA", "CO", "VE", "EC", "BO", "PE", "CL", "AR", "BR", "UY", "PY", "SR", "GY")
pr <- rladies %>%
  filter(pais %in% latam) %>%
  ggplot(aes(fecha_local, respuesta_asistire)) +
  geom_point(aes(color = ciudad)) + 
  geom_line(aes(color = pais)) +
  facet_wrap(~capitulo) + 
  scale_fill_hue(l=20, c=100) + 
  guides (color = FALSE) + 
  ggtitle("Reuniones R-ladies por año") + 
  xlab ("mes") + 
  ylab ("asistentes") + 
  theme(axis.text.x = element_text(3), text = element_text(size = 8))